{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import xgboost as xgb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# valid\n",
    "valid_nodup = joblib.load('../../scrum_data/valid_data/valid_nodup.lz4')\n",
    "valid_tag = joblib.load('../../scrum_data/valid_data/valid_tag.lz4')\n",
    "valid_date = joblib.load('../../scrum_data/valid_data/valid_date.lz4')\n",
    "valid_week =joblib.load('../../scrum_data/valid_data/valid_week.lz4')\n",
    "valid_null_sign = joblib.load('../../scrum_data/valid_data/valid_null_sign90.lz4')\n",
    "valid_majcnt = joblib.load('../../scrum_data/valid_data/valid_majcnt.lz4')\n",
    "valid_cf_fs = joblib.load('../../scrum_data/valid_data/valid_cf_86_fs10.lz4')\n",
    "\n",
    "valid = pd.concat([valid_nodup,valid_tag,valid_date,valid_week,valid_null_sign,valid_majcnt,valid_cf_fs],axis=1,copy=False)\n",
    "x_valid = valid.fillna(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "f_score = joblib.load('./f_score')\n",
    "\n",
    "valid_id = joblib.load('../../scrum_data/valid_data/valid_id.lz4')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_prob(model,d_valid,test_id):\n",
    "    pred = pd.DataFrame()\n",
    "    pred['id'] = test_id\n",
    "    pred['prob'] = model.predict(d_valid)\n",
    "    return pred"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 330"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = f_score[f_score!=0].index[:330]\n",
    "d_valid = xgb.DMatrix(x_valid[cols].values)\n",
    "\n",
    "xgb_model_330 = joblib.load('./model/xgb_model_330') \n",
    "pred = predict_prob(xgb_model_330,d_valid,valid_id)\n",
    "pred.to_csv('./pred/xgb_pred_330.txt',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 0.82420104895477"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 400\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = f_score[f_score!=0].index[:400]\n",
    "d_valid = xgb.DMatrix(x_valid[cols].values)\n",
    "\n",
    "xgb_model_400 = joblib.load('./model/xgb_model_400') \n",
    "pred = predict_prob(xgb_model_400,d_valid,valid_id)\n",
    "pred.to_csv('./pred/xgb_pred_400.txt',index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 460"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = f_score[f_score!=0].index[:460]\n",
    "d_valid = xgb.DMatrix(x_valid[cols].values)\n",
    "\n",
    "xgb_model_460 = joblib.load('./model/xgb_model_460') \n",
    "pred = predict_prob(xgb_model_460,d_valid,valid_id)\n",
    "pred.to_csv('./pred/xgb_pred_460.txt',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 0.8241621861799"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
